function [phis, taus]=threshold_estim(Regs, S)
% this functin performs the estimation of threshold
%
%INPUTS:
%     Regs          structure containing the dependent variable and rgeressors with fields
%          .N   number of countries
%          .y   1xN cell with elements being Ti x 1 vector of the observations on the dependent variable in country i
%          .T   N x 1  vector with elementes Ti (the number of time observations for country i)
%          .x   1xN cell with elements being  Ti x h matrices of regressors 
%           (including constant, lagged dep vars, contemporaneous and lagged vars, cs averages of y and lags, cs averages of x and lags)
%          .posxi 1xN cell with elements 1x2 vectors [start,end] defining position of the levels of X in x{i}. If X is absent, then the position is [0,0] 
%          .posylags 1xN cell with elements 1x2 vectors [start,end] defining position of the lags of Y in x{i}. If ylags are absent, then the position is [0,0] 
%
%          .topool h x 1 vector of boolean variables (1 pool, 0 not)
%          .tau_range range of tau
%          .zs  1�N cell with zs{i} being kz�ktau cell zpi. Let zshj=zpi{h,j}. zshj is T_{i}�p_{z}+1 matrix containing observations on cs averages of the i-th threshold variable and its p_{z} lags. The case when p_{z}=-1 is allowed (no cross-section averages of the indicator variable).
%          .z   1�N cell   z{i} is T_{i}�kz�ktau matrix. Column vector of this matrix contains observations on the threshold variable
%          .zlags 1xNcell with zlags{i} being kz�ktau cell zgi. 
%
%     S             number of replications to compute simulated critical values
%
%
%OUTPUTS:
%       phis        structure with the following fields
%          .phiall       ktau�1 vector containing estimates of the slope coefficient ? (for given values of ? specified in taurange)
%          .phihat       estimate of ?
%          .lr
%               .zlrmg
%               .zlrstd
%               .xlrmg
%               .xlrmg
%               .xlrstd
%               .zxlrmg
%               .zxlrstd
%               .lambdamg
%               .lambdastd
%          .tests is a structure containing the following fields:
%                 .tall       ktau�1 vector containing t-statistics (corresponding to estimates in phiall)
%                 .supt      supremum of tall
%                 .avet      average of tall
%                 .cvals is a structure containing the following fields:
%                        .cvsup    1�3 vector containing simulated 10%, 5% and 1% critical values for supt statistics
%                        .cvave    1�3 vector containing simulated 10%, 5% and 1% critical values for avet statistics
%           .cd     ktau x 2 vector with elements - CD test statistics and rhobar
%
%       taus        structure with the following fields
%           .tauall     tau_range is reproduced here
%           .tauhat     estimated tau
%           .ll         values of concentrated ll function




ktau=size(Regs.tau_range,1);
N=Regs.N;
kz=size(Regs.z{1},2);
topoolorig=Regs.topool;
klz=size(Regs.zlags{1}{1,1},2)*size(Regs.zlags{1},1);
topool=[true(klz,1)',topoolorig']';
h=size(topool,1);
ball=zeros(ktau,kz+sum(topool));
ccall=[1:N]';
kzs=size(Regs.zs{1},1);
cdall=zeros(ktau,1);
rhobarall=zeros(ktau,1);
nbr=h-sum(topool);

for ii=1:ktau
  %  ii
    dym=[]; dymnl=[];
    xp=[]; xpnl=[]; xp_pom=[];
    zm=[]; zm_pom=[];
    ccs=[];
    kiall=zeros(N,1);
    for i=1:N
        zsi=Regs.zs{i};
        
        zi=Regs.z{i};
        Ti=Regs.T(i);
        zgi=Regs.zlags{i};
        zgpi=zeros(Ti,0);
        for s=1:kz
            zgpi=[zgpi,zgi{s,ii}];
        end
        xi=[zgpi,Regs.x{i}];
        xip=xi(:,topool);
        zsiall=zeros(Ti,0);
        for sz=1:kzs
            zsiall=[zsiall,zsi{sz,ii}];
        end
        X=[xi(:,not(topool)),zsiall ];
        kiall(i,1)=size(X,2);
        Mx=eye(Ti)-X*(inv(X'*X))*X';
        Mxall{ii,i}=Mx;
        dym=[dym',(Mx*Regs.y{i})']';
        dymal{i}=(Mx*Regs.y{i})';
        zm=[zm',(Mx*zi(:,:,ii))']';     zm_pom=[zm_pom',zi(:,:,ii)']';
        zmal{i}=(Mx*zi(:,:,ii));    
        xp=[xp', (Mx*xip)']';           xp_pom=[xp_pom',xip']';
        ccs=[ccs',ones(1,Ti)*i]';
        % under null
        Xnl=[xi(:,not(topool)),zsiall ]; %Xnl=[xi(:,not(topool)),zsi{ii} ];
        kpom=size(zsiall,2);
        Mxnl=eye(Ti)-Xnl*(inv(Xnl'*Xnl))*Xnl';
        Mxallnl{ii,i}=Mxnl;
        xpnl=[xpnl', (Mxnl*xip)']';
        dymnl=[dymnl',(Mxnl*Regs.y{i})']';
        zmalnl{ii,i}=(Mxnl*zi(:,:,ii));
        xpalnl{ii,i}=(Mxnl*xip);
        evsalnl{ii, i}=[zmalnl{ii,i},xpalnl{ii,i}];
    end
    
      Tm=max(Regs.T);
      E=NaN(N,Tm);
      evs=[zm,xp];      evsnl=[xpnl];
      evsall{ii}=evs;   evsnlall{ii}=evsnl;
      dvs=dym;          dvsnl=dymnl;
        % estimation
       % ii
        b=evs\dvs;       bnl=evsnl\dvsnl;
        ball(ii,:)=b';   ballnl{ii}=bnl;
        e=dvs-evs*b;     enl=dvsnl-evsnl*bnl;    
        li=zeros(N,1);
        beta=zeros(N,nbr+kpom);
        zlr=zeros(N,kz);
        lambda=zeros(N,1);
        if Regs.posxi{1}(2)==0;
            xlr=zeros(N,0);
        else
            xlr=zeros(N,Regs.posxi{1}(2)-Regs.posxi{1}(1)+1); % we assume lag orders identical across units
        end
        for i=1:N
            ccode=ccall(i,1);
            ind=ccs==ccode;
            ei=e(ind,1);
            Ti=size(ei,1);
            Tall(i,1)=Ti;
            sigi=sum(ei.*ei)/Ti;
            sigis(i,1)=sigi;
            li(i,1)=-(1/2)*(log(2*pi))-(1/2)*log(sigi)-1/2;
            einl=enl(ind,1);
            siginl=sum(einl.*einl)/Ti;
            sigisnl(i,1)=siginl;
            E(i,Tm-Ti+1:Tm)=ei';
            % compute estimates of unit-specific coefficients
                % first compute ydi=yi- xi(:,topool)*b
                      zi=Regs.z{i};
                      Ti=Regs.T(i);
                      zgi=Regs.zlags{i};
                      zgpi=zeros(Ti,0);
                      for s=1:kz
                            zgpi=[zgpi,zgi{s,ii}];
                      end
                      xi=[zgpi,Regs.x{i}];
                      zi=Regs.z{i};
                 ydi=Regs.y{i}-[zi(:,:,ii), xi(:,topool)]*b;
                 % next compute estimate of betai
                      zsi=Regs.zs{i};
                      zsiall=zeros(Ti,0);
                      for sz=1:kzs
                          zsiall=[zsiall,zsi{sz,ii}];
                      end
                      X=[xi(:,not(topool)),zsiall ];
                 beta(i,:)=(inv(X'*X)*X'*ydi)';
                 if sum(Regs.posylags{i})==0 % case without lags of y
                     if not(Regs.posxi{1}(2)==0)
                        xlr(i,:)=beta(i,Regs.posxi{i}(1):Regs.posxi{i}(2));
                     end
                     zlr(i,:)=b(1:kz,1)';  
                     lambda(i,:)=NaN;
                 else
                     if not(Regs.posxi{1}(2)==0)
                        xlr(i,:)=beta(i,Regs.posxi{i}(1):Regs.posxi{i}(2))/(1-sum(beta(i,Regs.posylags{i}(1):Regs.posylags{i}(2))));
                        lambda(i,:)=1-sum(beta(i,Regs.posylags{i}(1):Regs.posylags{i}(2) ));
                     end
                     zlr(i,:)=b(1:kz,1)'/(1-sum(beta(i,Regs.posylags{i}(1):Regs.posylags{i}(2))));
                 end
        end
        betaall{ii}=beta;
        % compute long-run lr
        %               .zlrmg
        %               .zlrstd
        %               .xlrmg
        %               .xlrmg
        %               .xlrstd
        %               .zxlrmg
        %               .zxlrstd

            [zlrmg(ii,:),zlrstd(ii,:)]=stdest(zlr); %
            [xlrmg(ii,:),xlrstd(ii,:)]=stdest(xlr);
            [lambdamg(ii,:),lambdastd(ii,:)]=stdest(lambda);
          if not(Regs.posxi{1}(2)==0)
            if kz==1
                [zxlrmg(ii,:),zxlrstd(ii,:)]=stdest(zlr+xlr(:,1));
            else % kz==2
                [zxlrmg(ii,:),zxlrstd(ii,:)]=stdest([zlr(:,1)+xlr(:,1),zlr(:,1)+zlr(:,2)+xlr(:,1)]);
            end
          else
              zxlrmg(ii,:)=zlrmg(ii,:); zxlrstd(ii,:)=zlrstd(ii,:);
          end
        [cdall(ii,1), rhobarall(ii,1)]=CDunbalanced(E);
    ll(ii,1)=ones(1,N)*li/N; 
 
 if kz==1   %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
    
    hp=size(evsalnl{1,1},2);
    qb=zeros(hp,hp);
    for i=1:N
        q{ii,i}=evsalnl{ii,i}'*evsalnl{ii,i}; %/Ti;
        qb=qb+q{ii,i};
    end
       iqb=inv(qb); %(/N)
       Sall{ii}=qb;
       iqball{ii}=iqb;
       vpom=zeros(hp,hp);
       for i=1:N
            vpom=vpom+q{ii,i}*sigisnl(i,1);
       end
       vpom=vpom; %/N
       vi=iqb*(vpom)*iqb';
       
       stdall(ii,1)=vi(1,1)^0.5;
       tst(ii,1)=abs(b(1,1)/(stdall(ii,1)));
       pvalall(ii,1)=(1-normcdf(tst(ii,1),0,1))*2;
 else % F-test for kz>1
%      % RSS unrestricted
%      RSSu=e'*e;
%      % RSS restricted
%      RSSr=enl'*enl;
%      %F-test
%      nobs=sum(Tall);
%      nparam=sum(topool)+kz+sum(kiall);
%      F(ii,1)=((RSSr-RSSu)/kz) / (RSSu/(nobs-nparam)); % F distributed with 2,nobs-nparam
%      %Fpval
%     fpval(ii,1)=1-fcdf(F(ii,1),kz,nobs-nparam);

% do wald test instead

    hp=size(evsalnl{1},2);
    qb=zeros(hp,hp);
    for i=1:N
        q{ii,i}=evsalnl{ii,i}'*evsalnl{ii,i}; %/Ti;
        qb=qb+q{ii,i};
    end
       iqb=inv(qb); %(/N)
       Sall{ii}=qb;
       vpom=zeros(hp,hp);
       for i=1:N
            vpom=vpom+q{ii,i}*sigisnl(i,1);
       end
       ivpom=inv(vpom); %/N
       iSall{ii}=ivpom;
       %bnn=zeros(size(b));
       %bnn(1:kz)=b(1:kz);
        bnnl=zeros(size(b));
        bnnl(kz+1:end,1)=bnl;
       W(ii,1)=(b-bnnl)'*Sall{ii}*iSall{ii}*Sall{ii}*(b-bnnl);

 end
    
end

[mx, ind]=max(ll);
 tauhat=Regs.tau_range(ind);
 phi=ball(ind,:);
 
if kz==1 
 supt=max(tst);
 avet=mean(tst);
 tsall=tst;
 
 %OUTPUTS:
phis.phiall=ball(:,1);
phis.phihat=phi;
phis.tests.tall=tst;
phis.tests.supt=supt;
phis.tests.avet=avet;

taus.tauall=Regs.tau_range;
taus.tauhat=tauhat;
taus.ll=ll;
else
 supt=max(W);
 avet=mean(W);
    
 %OUTPUTS:
phis.phiall=ball(:,1:kz);
phis.phihat=phi;
phis.tests.Wall=W;
phis.tests.supt=supt;
phis.tests.avet=avet;

taus.tauall=Regs.tau_range;
taus.tauhat=tauhat;
taus.ll=ll; 
end
phis.cd=[cdall,rhobarall];

lr.zlrmg=zlrmg;
lr.zlrstd=zlrstd;
lr.xlrmg=xlrmg;
lr.xlrmg=xlrmg;
lr.xlrstd=xlrstd;
lr.zxlrmg=zxlrmg;
lr.zxlrstd=zxlrstd;
lr.lambdamg=lambdamg;
lr.lambdastd=lambdastd;
phis.lr=lr;



  %% simulate critical values
 
 % get new eta
 %tic
 Rcv=S;
 supts=zeros(Rcv,1);
 avets=zeros(Rcv,1);
 tss=zeros(Rcv,3);
 
 meta=cell(1,N);

 for rcv=1:Rcv

    tstsim=zeros(ktau,1);

    for ii=1:ktau;
        
     for i=1:N
        Ti=Tall(i,1);
        etai=randn(Ti,1); %randn(N,Te).*(sigs*ones(1,Te));
        ysi=etai+xpalnl{ii,i}*ballnl{ii}; %evsalnl{ii,i};
        Mx=Mxall{ii,i};
        meta{i}=(Mx*ysi)';
     end
     dvs=[];
     for i=1:N
        dvs=[dvs',meta{i}]'; 
     end
        evs=evsall{ii}; evsnl=evsnlall{ii};
        b=evs\dvs;  bnl=evsnl\dvs;
        e=dvs-evs*b; 
        enl=dvs-evsnl*bnl;  
       
            for i=1:N
                ccode=ccall(i,1);
                ind=ccs==ccode;
                Ti=size(ei,1);
                einl=enl(ind,1);
                siginl=sum(einl.*einl)/Ti;
                sigisnl(i,1)=siginl;
            end
     if kz==1
       
        iqb=iqball{ii};
        vpom=zeros(hp,hp);
           for i=1:N
                vpom=vpom+q{ii,i}*sigisnl(i,1);
           end
           vpom=vpom; %/N
           vi=iqb*(vpom)*iqb';
    
           stde=vi(1,1)^0.5;
           tstsim(ii,1)=abs(b(1,1)/(stde));
     else % F-test
%         % RSS unrestricted
%         RSSu=e'*e;
%         % RSS restricted
%         RSSr=enl'*enl;
%         %F-test
%         nobs=sum(Tall);
%         nparam=sum(topool)+kz+sum(kiall);
%         F(ii,1)=((RSSr-RSSu)/kz) / (RSSu/(nobs-nparam)); % F distributed with 2,nobs-nparam

%Waldt test


       vpom=zeros(hp,hp);
       for i=1:N
            vpom=vpom+q{ii,i}*sigisnl(i,1);
       end
       ivpom=inv(vpom); %/N
        bnnl=zeros(size(b));
        bnnl(kz+1:end,1)=bnl;
       W(ii,1)=(b-bnnl)'*Sall{ii}*ivpom*Sall{ii}*(b-bnnl);
     end % kz
    end
    if kz==1
        supts(rcv,1)=max(tstsim);
        avets(rcv,1)=mean(tstsim);
    else
        supts(rcv,1)=max(W);
        avets(rcv,1)=mean(W);
    end
 end
 phis.tests.cvals.cvsup=quantile(supts,[0.9,0.95,0.99]);
 phis.tests.cvals.cvave=quantile(avets,[0.9,0.95,0.99]);
 
end






